import requests
import w3cbs4
import time
class JsW3c:
    w3cdomain = "http://www.w3school.com.cn"
    pc_headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Cookie': '_ga=GA1.3.1826260650.1561516038; _gid=GA1.3.1055229536.1561516038; ASPSESSIONIDAABRSQSA=CEBMAFLAHMEIOMMKLJALMGNL',
        'Host': 'www.w3school.com.cn',
        'Referer': 'http://www.w3school.com.cn/',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
    }
    def __init__(self):
        pass
    def start(self):
        """
         开始爬
        :return:
        """
        w3c_js_url="http://www.w3school.com.cn/js/index.asp"
        # w3c_js_url="http://www.w3school.com.cn/js/js_array_iteration.asp"
        js_r=requests.get(w3c_js_url,headers=self.pc_headers)
        js_r.encoding='gbk'
        return  w3cbs4.W3cBs4(js_r.text).left_menu()
        # for key,item in enumerate(r):
        #     if key<3:
        #         continue
        #     self.child_url_contents(item['menu_url'])
        #     time.sleep(6)

    def child_url_contents(self,url):
        try:

            ch_r=requests.get(self.gen_full_url(url),headers=self.pc_headers)
            ch_r.encoding='gbk'
            # print(ch_r.text)
            return w3cbs4.W3cBs4(ch_r.text).get_next_content()
        except Exception as e:
            print(e)
            return []

    def gen_full_url(self,suffix):
        return self.w3cdomain+suffix

if __name__ == '__main__':
    w3cjs=JsW3c()
    print(w3cjs.start())
    # print(w3cjs.child_url_contents('/js/js_output.asp'))